
*Filename: 2_child-parent-linked-IDs.do
*Created: 20201012
*Last edited:20210823 (Added 2019 survey data from March 2021 release, so now use pid19.dta)

/*Description: 
	Cleans the parent-identification file, identifying a mother and father by 
	first considering only parents that have been in a sample family; of these
	parents, use biological parent if available; if not, use adoptive parent.
	
	The PSID's parent-identification file (pid19.dta) has a record for each
	individual living in a PSID family at the time of any interview, and for 
	individuals who have never been in the study themselves but have been 
	identified as the child someone who has. 
	
	Unique individuals are identified with their 1968 Family ID
	and their 1968 Sequence ID (Person Number). The key elements of this
	dataset are the corresponding IDs of any biological or adoptive mothers
	or fathers. 
	
	The created dataset contains child ID - mother ID - father ID records,
	which facilitates linking the child and parent income records from the
	family files. (Early PSID studies did not rely on parent-child links,
	and just used the family files themselves, measuring "parent" income
	as that of the head/wife of the household the child was living in at 
	a particular age.)
	
	The raw pid19.dta file also contains indicators for the source information
	(how/when it was determined that this individual was the person's father).
	For example, PID10 is an indicator for whether the birth mother was identified
	by the interviewer in the 1983/84 survey, while PID11 is the same 
	indicator, but for a link coded by PSID staff during family composition
	data editing.
*/


******************************************************* 
clear 
//version 16.0
set more off
capture log close 
set maxvar 32000
 

 
*Log file
log using ${projdata}/child-parent-linked-IDs.log, replace




clear
use ${rawdata}/pid19.dta

//replace zeros with missing because 0=Inap, no further information about mother/father
foreach var in PID2 PID3 PID4 PID5 PID6 PID7 PID8 PID9 PID23 PID24 PID25 PID26 PID27 PID28 {
	replace `var'=. if `var'==0
}

* "CHILD" ID variables
  rename PID2 id1	// individual's 1968 Family ID
  rename PID3 seq1	// individual's 1968 Sequence number (also referred to as "Person number")
  gen newid=(id1*1000)+seq1
    
	
	
	
	
* MOTHER ID variables (use biological mother first)
  gen m_adopt=.
  gen m_id1=.
  gen m_seq1=.
  
  /* 
  If a biological mother is identified, use this person as individual's mother 
  */
  
  replace m_id1=PID4  if (PID4!=. & PID5!=.) 	// biological mother's 1968 ID
  replace m_seq1=PID5 if (PID4!=. & PID5!=.)	// biological mother's 1968 sequence number (Person Number)
  replace m_adopt=0   if (m_id1!=. & m_seq1!=.) // If used biological mother, then adopted mother flag =0.
  
   /* 
   If a biological mother was not identified, and there is information for an 
   adoptive mother, use this person as an individual's mother 
   */
   
  replace m_adopt=1   if (m_id1==. & m_seq1==.) & (PID6!=. & PID7!=.) // If biological mother was not identified, and IDs are available for adoptive mother, then adopted mother flag =1. 
  replace m_id1=PID6  if (m_id1==. & m_seq1==.) & (PID6!=. & PID7!=.) // 1st adoptive mother's 1968 ID
  replace m_seq1=PID7 if 			(m_seq1==.) & (PID6!=. & PID7!=.) // 1st adoptive mother's 1968 sequence number (Person Number)

  /* 
  This uses the "2nd" adoptive mother, if a "1st" adoptive mother was not available.
  It is unlikely this would ever be the case, but this allows for modifications 
  later if we wanted to take the first adoptive mother that was, say,
  a sample member or a member of the SRC sample, etc. 
  In pid19.dta: N=2 (out of >100k) individuals have a second adoptive mother.
  */
	 
  replace m_adopt=1   if (m_id1==. & m_seq1==.) & (PID8!=. & PID9!=.)
  replace m_id1=PID8  if (m_id1==. & m_seq1==.) & (PID8!=. & PID9!=.) // 2nd adoptive mother's 1968 ID
  replace m_seq1=PID9 if (m_seq1==.) 			& (PID8!=. & PID9!=.) // 2nd adoptive mother's 1968 sequence number (Person Number)
  
  
  *Create unique individual ID for mother (to link to individual-level income data)
  gen m_newid=(m_id1*1000)+m_seq1 if m_id1!=. & m_seq1!=.
  
  
  
  
  
  
* FATHER ID variables (use biological father first)
  gen f_adopt=.
  gen f_id1=.
  gen f_seq1=.

  /* 
  If a biological father is identified, use this person as individual's father 
  */  
  
  replace f_id1=PID23  if (PID23!=. & PID24!=.)	 // biological father's 1968 ID
  replace f_seq1=PID24 if (PID23!=. & PID24!=.)  // biological father's 1968 sequence number (Person Number)
  replace f_adopt=0    if (f_id1!=. & f_seq1!=.) // If used biological father, then adopted father flag =0.

   /* 
   If a biological father was not identified, and there is information for an 
   adoptive father, use this person as an individual's father 
   */
  
  replace f_adopt=1    if (f_id1==. & f_seq1==.) & (PID25!=. & PID26!=.) // If biological father was not identified, and IDs are available for adoptive father, then adopted father flag =1.
  replace f_id1=PID25  if (f_id1==. & f_seq1==.) & (PID25!=. & PID26!=.) // 1st adoptive father's 1968 ID
  replace f_seq1=PID26 if (f_seq1==.) 			 & (PID25!=. & PID26!=.) // 1st adoptive father's 1968 sequence number (Person Number)
  
  /*
  This uses the "2nd" adoptive father, if a "1st" adoptive father was not available.
  As of 2019 (pid19.dta) N=0 individuals have non-zero values for the 2nd adoptive
  father variables below.
  */

  replace f_adopt=1    if (f_id1==. & f_seq1==.) & (PID27!=. & PID28!=.) // If biological father was not identified, and IDs are available for adoptive father, then adopted father flag =1.
  replace f_id1=PID27  if (f_id1==. & f_seq1==.) & (PID27!=. & PID28!=.) // 1st adoptive father's 1968 ID
  replace f_seq1=PID28 if (f_seq1==.) 			 & (PID27!=. & PID28!=.) // 1st adoptive father's 1968 sequence number (Person Number)

  
  
  *Create unique individual ID for father (to link to individual-level income data)
  gen f_newid=(f_id1*1000)+f_seq1 if f_id1!=. & f_seq1!=.

  
*Clean up and save
rename PID1 releasenum

label var m_adopt "=0/1 for biological/adoptive mother"  
label var f_adopt "=0/1 for biological/adoptive father"
 
keep newid m_* f_* releasenum
order newid m_newid m_id1 m_seq1 m_adopt f_newid f_id1 f_seq1 f_adopt releasenum


*SAVE cleaned parent identification links
qui compress
save ${projdata}/child-parent-linked-IDs.dta, replace



clear
log close

*End 2_child-parent-linked-IDs.do*
